In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from datetime import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
%matplotlib inline 
In [2]:
!ls
2019_nCoV_20200121_20200206.csv
Coronavirus.ipynb
geodata.csv
time_series_2019-ncov-Confirmed.csv
time_series_2019-ncov-Deaths.csv
time_series_2019-ncov-Recovered.csv
In [2]:
#Available data up to 2/22/2020
confirmed = pd.read_csv('time_series_2019-ncov-Confirmed.csv').fillna(0)
death = pd.read_csv('time_series_2019-ncov-Deaths.csv').fillna(0)
recovered = pd.read_csv('time_series_2019-ncov-Recovered.csv').fillna(0)
In [3]:
#convert column name to datetime format
confirmedtimeframe = confirmed.iloc[:,4:len(confirmed.columns)]
confirmedtimeframe.columns = pd.to_datetime(confirmedtimeframe.columns).date
deathtimeframe = death.iloc[:,4:len(death.columns)]
deathtimeframe.columns = pd.to_datetime(deathtimeframe.columns).date
recoveredtimeframe = recovered.iloc[:,4:len(recovered.columns)]
recoveredtimeframe.columns = pd.to_datetime(recoveredtimeframe.columns).date
In [4]:
confirmed = pd.concat([confirmed.iloc[:,0:4],confirmedtimeframe],axis = 1,sort=True)
death = pd.concat([death.iloc[:,0:4], deathtimeframe],axis = 1,sort=True)
recovered = pd.concat([recovered.iloc[:,0:4], recoveredtimeframe],axis = 1,sort=True)
In [5]:
#Melt indivdual timeframe data to values under 'Date'
confirmed = confirmed.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Confirmed")
death = death.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Death")
recovered = recovered.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Recovered")
In [6]:
#Consolidate dataframe that's ready to be used
data = pd.concat([confirmed, death.iloc[:,5],recovered.iloc[:,5]], axis =1, sort = True)
In [7]:
#returns all provinces and states affected
confirmed['Province/State'].unique()
Out[7]:
array(['Anhui', 'Beijing', 'Chongqing', 'Fujian', 'Gansu', 'Guangdong',
       'Guangxi', 'Guizhou', 'Hainan', 'Hebei', 'Heilongjiang', 'Henan',
       'Hubei', 'Hunan', 'Inner Mongolia', 'Jiangsu', 'Jiangxi', 'Jilin',
       'Liaoning', 'Ningxia', 'Qinghai', 'Shaanxi', 'Shandong',
       'Shanghai', 'Shanxi', 'Sichuan', 'Tianjin', 'Tibet', 'Xinjiang',
       'Yunnan', 'Zhejiang', 0, 'Taiwan', 'Seattle, WA', 'Chicago, IL',
       'Tempe, AZ', 'Macau', 'Hong Kong', 'Toronto, ON',
       'British Columbia', 'Orange, CA', 'Los Angeles, CA',
       'New South Wales', 'Victoria', 'Queensland', 'London, ON',
       'Santa Clara, CA', 'South Australia', 'Boston, MA',
       'San Benito, CA', 'Madison, WI', 'Diamond Princess cruise ship',
       'San Diego County, CA', 'San Antonio, TX',
       'Omaha, NE (From Diamond Princess)',
       'Travis, CA (From Diamond Princess)', 'From Diamond Princess',
       'Lackland, TX (From Diamond Princess)', 'Humboldt County, CA',
       'Sacramento County, CA'], dtype=object)
In [8]:
#returns all country and regions affected
confirmed['Country/Region'].unique()
Out[8]:
array(['Mainland China', 'Thailand', 'Japan', 'South Korea', 'Taiwan',
       'US', 'Macau', 'Hong Kong', 'Singapore', 'Vietnam', 'France',
       'Nepal', 'Malaysia', 'Canada', 'Australia', 'Cambodia',
       'Sri Lanka', 'Germany', 'Finland', 'United Arab Emirates',
       'Philippines', 'India', 'Italy', 'UK', 'Russia', 'Sweden', 'Spain',
       'Belgium', 'Others', 'Egypt', 'Iran', 'Israel', 'Lebanon'],
      dtype=object)
In [9]:
#separate China region from and other regions worldwide
chinaregion = data.loc[(data['Country/Region'] == 'Mainland China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:]
ind = data.loc[(data['Country/Region'] == 'Mainland China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:].index
nonchinaregion = data.drop(data.index[ind],inplace = False).reset_index(drop = True)
In [10]:
chinaregionpivot = pd.pivot_table(chinaregion,index=["Country/Region","Province/State",'Date'], 
                                  values = ['Confirmed','Recovered','Death'],
                                  aggfunc = np.mean)
In [11]:
nonchinaregionpivot = pd.pivot_table(nonchinaregion,index=["Country/Region","Province/State",'Date'], 
                      values = ['Confirmed','Recovered','Death'],
                      aggfunc = np.mean)
In [12]:
#Function to extract timeframe table per location
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State
def getlocationdata(region, country, state):
    location = region.loc[(country, state,), :]
    return location
In [13]:
#Insert region, country, state values for the location in question and rename the timeframe table
Hubei = getlocationdata(chinaregionpivot, 'Mainland China', 'Hubei')
In [14]:
plt.figure(figsize=(16,8))
Hubei.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
plt.ylabel('Number of Patient')
plt.title('Confirmed in Hubei')
plt.legend()
Out[14]:
<matplotlib.legend.Legend at 0x1a8b34f4978>
In [15]:
plt.figure(figsize=(16,8))
Hubei.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
Hubei.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
plt.ylabel('Number of Patient')
plt.title('Death and Recovery in Hubei')
plt.legend()
Out[15]:
<matplotlib.legend.Legend at 0x1a8b3ee0a20>
In [16]:
#Get the latest data
chinalatest = pd.DataFrame(chinaregion.groupby(['Province/State'])['Lat','Long','Confirmed','Recovered','Death'].agg('max')).sort_values(by=['Confirmed'], ascending = False).reset_index()
In [17]:
fig = px.bar(chinalatest, x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Confirmed in China Region')
fig.show()
In [18]:
fig = px.bar(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',
             color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Confirmed in China Region Excluding Hubei')
fig.show()
In [19]:
nonchinalatest = pd.DataFrame(nonchinaregion.groupby(['Country/Region'])['Lat','Long','Confirmed','Recovered','Death'].agg('max')).sort_values(by = 'Confirmed', ascending = False).reset_index()
In [39]:
fig = px.bar(nonchinalatest.loc[nonchinalatest['Country/Region'] != 'Others',:], x='Country/Region', y='Confirmed',color = 'Country/Region',
             color_discrete_sequence= px.colors.qualitative.Set3,
             hover_data=['Confirmed'], title = 'Confirmed outside China')
fig.show()
In [23]:
chinalatest['Death Rate'] = chinalatest['Death']/chinalatest['Confirmed']
chinalatest['Recover Rate'] = chinalatest['Recovered']/chinalatest['Confirmed']
nonchinalatest['Death Rate'] = nonchinalatest['Death']/nonchinalatest['Confirmed']
nonchinalatest['Recover Rate'] = nonchinalatest['Recovered']/nonchinalatest['Confirmed']
In [24]:
#Death v.s. Confirmed in other part of China
fig = px.scatter(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x="Confirmed", y="Death", size = 'Recover Rate',
                 color="Province/State",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                title = 'Deaths in Confirmed in Other Part of China')
fig.show()
In [25]:
#Death v.s. Confirmed outside China
fig = px.scatter(nonchinalatest, x="Confirmed", y="Death", size = 'Recover Rate',
                 color="Country/Region",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                title = 'Deaths in Confirmed outside China')
fig.show()
In [26]:
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State (enter 0 if shows '0')
def getincremental(region, country, state): 
    incc = []
    incd = []
    incr = []
    for i in range(len(getlocationdata(region, country, state))-1):
        c = getlocationdata(region, country, state)['Confirmed'][i+1]-getlocationdata(region, country, state)['Confirmed'][i]
        d = getlocationdata(region, country, state)['Death'][i+1]-getlocationdata(region, country, state)['Death'][i]
        r = getlocationdata(region, country, state)['Recovered'][i+1]-getlocationdata(region, country, state)['Recovered'][i]
        incc.append(c)
        incd.append(d)
        incr.append(r)

    dic = {'Incremental Confirmed': incc, 'Incremental Death': incd, 'Incremental Recovered': incr}

    incremental = pd.DataFrame(data = dic)
    incremental['Day'] = np.arange(1,len(incremental)+1,1)
    return incremental
In [27]:
#Specify the region you want to explore
region = chinaregionpivot
country = 'Mainland China'
state = 'Hubei'
incremental = getincremental(region, country, state)
incremental
Out[27]:
Incremental Confirmed Incremental Death Incremental Recovered Day
0 0 0 0 1
1 105 7 3 2
2 212 16 1 3
3 297 12 10 4
4 365 24 3 5
5 2131 49 35 6
6 0 0 8 7
7 1349 37 2 8
8 903 42 51 9
9 1347 45 27 10
10 4024 101 127 11
11 2345 64 91 12
12 3156 65 136 13
13 2987 70 111 14
14 2447 69 184 15
15 2841 81 298 16
16 2147 81 324 17
17 2531 91 356 18
18 2097 103 427 19
19 1638 94 417 20
20 0 0 47 21
21 14840 242 773 22
22 6200 147 1315 23
23 1843 139 849 24
24 1933 100 1016 25
25 1807 93 1223 26
26 1693 132 1266 27
27 349 108 1209 28
28 411 115 1451 29
29 220 0 93 30
30 1422 202 3418 31
In [28]:
#Plot trend of incremental confirms 
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Confirmed'], name='Confirmed',
                         mode='lines+markers',line=dict(color='firebrick', width=2, dash='dash')))
if state != 0:
    fig.update_layout(title='Incremental Confirms in ' + state,
                       xaxis_title='Days',
                       yaxis_title='Confirms')
else:
    fig.update_layout(title='Incremental Confirms in ' + country,
                       xaxis_title='Days',
                       yaxis_title='Confirms')
fig.show()
In [29]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Death'],
                    name='Death', mode='lines+markers',line=dict(color='firebrick', width=2, dash='dot')))
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Recovered'],
                    name='Recovered', mode='lines+markers',line=dict(color='royalblue', width=2,dash='dot')))
if state != 0:
    fig.update_layout(title='Incremental Death and Recovery in ' + state,
                   xaxis_title='Days',
                   yaxis_title='Number of Patients')
else:
    fig.update_layout(title='Incremental Death and Recovery in ' + country,
                   xaxis_title='Days',
                   yaxis_title='Number of Patients')
fig.show()
In [30]:
chinaregion['Date'] = pd.to_datetime(chinaregion['Date'])
chinaregion['Date'] = chinaregion['Date'].dt.strftime('%b.%d')
nonchinaregion['Date'] = pd.to_datetime(nonchinaregion['Date'])
nonchinaregion['Date'] = nonchinaregion['Date'].dt.strftime('%b.%d')
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [35]:
#Spreading in all China regions
fig = px.scatter_geo(chinaregion, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Province/State", size_max=20,
                    color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed in China thru Timeline')
fig.show()
In [36]:
#Spreading in other China regions (Excluding Hubei)
fig = px.scatter_geo(chinaregion.loc[chinaregion['Province/State'] != 'Hubei',:], lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date", 
                    hover_name="Province/State", size_max=10,
                    color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed in China (excluding Hubei) thru Timeline')
fig.show()
In [37]:
#Spreading in all regions outside China
fig = px.scatter_geo(nonchinaregion, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed',animation_frame="Date",
                    hover_name="Country/Region", size_max=20,
                    color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed outside China thru Timeline')
fig.show()
In [38]:
#Spreading in all named regions outside China
fig = px.scatter_geo(nonchinaregion.loc[(nonchinaregion['Country/Region'] != 'Others')|(nonchinaregion['Country/Region'] != 0),:], 
                     lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Province/State", size_max=30,
                    color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed Outside China thru Timeline (only includes named regions)')
fig.show()